# Dickstein, Ho, and Mark (2023)
# This R script creates a dataset of premiums (both net and gross) at the subscriber - constructed plan - year observation level.

# Preliminaries
setwd("../library")
source("PreliminariesCode.R")

# Loading Data

## Constructed Plans
ConstructedPlanDat <- fread("orig/ConstructedPlanDat.csv")

## Subscriber Data
SubDat_List<- list()
for (i in 5:7){
  SubDat_List[[i]] <- fread(paste0("orig/SubData", Shortyear.Vec[i], "7.csv"))
}
SubDat_All <- do.call("rbind", SubDat_List[5:7])

## Second Cheapest Silver Plan
SecondCheapestSilver <- fread("orig/SecondCheapestSilver.csv")

## Merging the two
SubDat_All <- merge(
  SubDat_All[, .(
    year, subscriberid, best_guess_applicable_ra, exchange, markettype, best_guess_metal, best_guess_plantype, 
    RatioSum, best_guess_incomeoverFPL,
    frate10, frate50, frate90, best_guess_frate,
    srate10, srate50, srate90, best_guess_srate,
    PremiumCap10, PremiumCap50, PremiumCap90, best_guess_PremiumCap, bg_metal_qual)],
  SecondCheapestSilver,
  by.x = c("best_guess_applicable_ra", "year"),
  by.y = c("ratingarea", "year"),
  all.x = T)

# Defining the estimated subsidy
SubDat_All$Subsidy10 <- (SubDat_All$RatioSum * SubDat_All$SecondCheapestPrem - (SubDat_All$PremiumCap10 / 12))
SubDat_All$Subsidy50 <- (SubDat_All$RatioSum * SubDat_All$SecondCheapestPrem - (SubDat_All$PremiumCap50 / 12))
SubDat_All$Subsidy90 <- (SubDat_All$RatioSum * SubDat_All$SecondCheapestPrem - (SubDat_All$PremiumCap90 / 12))
SubDat_All$best_guess_Subsidy <- (SubDat_All$RatioSum * SubDat_All$SecondCheapestPrem - (SubDat_All$best_guess_PremiumCap / 12))

# If estimated subsidy is negative, then the true subsidy is equal to zero
SubDat_All$Subsidy10[SubDat_All$Subsidy10 < 0] <- 0
SubDat_All$Subsidy50[SubDat_All$Subsidy50 < 0] <- 0
SubDat_All$Subsidy90[SubDat_All$Subsidy90 < 0] <- 0
SubDat_All$best_guess_Subsidy[SubDat_All$best_guess_Subsidy < 0] <- 0

# For Those with FPL above 4, they have 0 subsidies
SubDat_All$Subsidy10[SubDat_All$IncomeOverFPL10 > 4] <- 0
SubDat_All$Subsidy50[SubDat_All$IncomeOverFPL50 > 4] <- 0
SubDat_All$Subsidy90[SubDat_All$IncomeOverFPL90 > 4] <- 0
SubDat_All$best_guess_Subsidy[SubDat_All$best_guess_incomeoverFPL > 4] <- 0

# Merging with constructed plan data
names(SubDat_All)[which(names(SubDat_All) %in% c("best_guess_applicable_ra", "exchange", "markettype"))] <- c("ratingarea", "Exchange", "Market.Type")
PremiumData <- ConstructedPlanDat %>%
  left_join(SubDat_All, by = c("year", "ratingarea", "Exchange", "Market.Type"))

# Creating Plan Actuarial Value: 
PremiumData$plan_AV <- ifelse(
  PremiumData$metal == 2, 60,
    ifelse(PremiumData$metal == 3, 70,
      ifelse(PremiumData$metal == 4, 80,
        ifelse(PremiumData$metal == 5, 90, NA))))

# Creating Best Guess Actuarial Value:
PremiumData$best_guess_AV <- ifelse(
  PremiumData$metal == 2, 60,
    ifelse(PremiumData$metal == 3, 70,
      ifelse(PremiumData$metal == 4, 80,
        ifelse(PremiumData$metal == 5, 90, NA))))
PremiumData$best_guess_AV <- ifelse(
  PremiumData$metal == 3 & PremiumData$best_guess_incomeoverFPL >= 1 & PremiumData$best_guess_incomeoverFPL <= 1.5, 
  94,
  PremiumData$best_guess_AV)
PremiumData$best_guess_AV <- ifelse(
  PremiumData$metal == 3 & PremiumData$best_guess_incomeoverFPL > 1.5 & PremiumData$best_guess_incomeoverFPL <= 2, 
  87,
  PremiumData$best_guess_AV)
PremiumData$best_guess_AV <- ifelse(
  PremiumData$metal == 3 & PremiumData$best_guess_incomeoverFPL > 2 & PremiumData$best_guess_incomeoverFPL <= 2.5, 
  73,
  PremiumData$best_guess_AV)

# Kondo!
PremiumData <- data.table(PremiumData)[, .(
  subscriberid, year, PAYER_ID, ratingarea, Exchange, Market.Type, metal, MNC,
  grossprem = RatioSum * Individual_Rate,
  frate10, frate50, best_guess_frate,
  srate10, srate50, best_guess_srate,
  Subsidy10, Subsidy50, best_guess_Subsidy,
  best_guess_AV, plan_AV, bg_metal_qual), ]

# Creating Net Premiums
PremiumData$netprem10 = ifelse(
  PremiumData$Market.Type == "Individual",
  ifelse(PremiumData$Exchange == "Off Exchange",
     PremiumData$grossprem,
     pmax(PremiumData$grossprem - PremiumData$Subsidy10, 0)),
   PremiumData$grossprem * (100 - PremiumData$frate10 - PremiumData$srate10) / 100)

PremiumData$netprem50 = ifelse(
  PremiumData$Market.Type == "Individual",
  ifelse(PremiumData$Exchange == "Off Exchange",
    PremiumData$grossprem,
    pmax(PremiumData$grossprem - PremiumData$Subsidy50, 0)),
  PremiumData$grossprem * (100 - PremiumData$frate50 - PremiumData$srate50) / 100)

PremiumData$best_guess_netprem = ifelse(
  PremiumData$Market.Type == "Individual",
  ifelse(PremiumData$Exchange == "Off Exchange",
    PremiumData$grossprem,
    pmax(PremiumData$grossprem - PremiumData$best_guess_Subsidy, 0)),
  PremiumData$grossprem * (100 - PremiumData$best_guess_frate - PremiumData$best_guess_srate) / 100)

# Finally, create a new constructed plan id
PremiumData[, constructed_plan_year := paste0(
  substr(Market.Type, 0, 2),
  substr(Exchange, 0, 2),
  ratingarea,
  substr(PAYER_ID, 4, 5),
  metal,
  MNC,
  "_",
  year
), ]

# Restricting to non-missing observations
PremiumData <- PremiumData[!is.na(Market.Type) & !is.na(Exchange) & !is.na(ratingarea) & !is.na(PAYER_ID) & !is.na(metal) & !is.na(MNC)]

# Save the all choice premiums dataset
fwrite(PremiumData, "AllChoicePremiums.csv")
